# -*- coding: utf-8 -*-
from urllib import parse

import requests
from lxml import etree

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.66',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Referer': 'https://www.qiushibaike.com/text/',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
}
next_page = "/text/page/1/"
while next_page:
    response = requests.get(url=parse.urljoin('https://www.qiushibaike.com', next_page), headers=headers)
    html = etree.HTML(response.content)
    infos = html.xpath("//div[contains(@class,'article block untagged mb15')]")
    for one in infos:
        content = one.xpath("string(.//div[@class='content'])")
        vote = one.xpath(".//div[@class='stats']/span[@class='stats-vote']//i/text()")
        vote = vote[0] if vote else 0
        comments = one.xpath(".//div[@class='stats']/span[@class='stats-comments']//i/text()")
        comments = comments[0] if comments else 0
        cmt_main = "".join(one.xpath(".//div[@class='cmtMain']/div[@class='main-text']/text()")).strip()
        item = {
            "content": content.strip(),
            "vote": vote,
            "comments": comments,
            "cmt_main": cmt_main,
        }
        print(item)
        print("*" * 100)
                #爬虫结果保存到文件wordCloud，供词云使用
        with open('wordCloud.txt', 'a', encoding='utf-8') as a:
            a.write(item['content'])
    next_page = html.xpath("//span[@class='next']/../@href")
    next_page = next_page[0] if next_page else None
    print(next_page)

